In [1]:
!pip install -U git+https://github.com/albumentations-team/albumentations
Collecting git+https://github.com/albumentations-team/albumentations
  Cloning https://github.com/albumentations-team/albumentations to /tmp/pip-req-build-sg2hkvm8
  Running command git clone -q https://github.com/albumentations-team/albumentations /tmp/pip-req-build-sg2hkvm8
Requirement already satisfied, skipping upgrade: numpy>=1.11.1 in /usr/local/lib/python3.6/dist-packages (from albumentations==0.4.6) (1.18.5)
Requirement already satisfied, skipping upgrade: scipy in /usr/local/lib/python3.6/dist-packages (from albumentations==0.4.6) (1.4.1)
Requirement already satisfied, skipping upgrade: scikit-image>=0.16.1 in /usr/local/lib/python3.6/dist-packages (from albumentations==0.4.6) (0.16.2)
Collecting imgaug>=0.4.0
  Downloading https://files.pythonhosted.org/packages/66/b1/af3142c4a85cba6da9f4ebb5ff4e21e2616309552caca5e8acefe9840622/imgaug-0.4.0-py2.py3-none-any.whl (948kB)
     |████████████████████████████████| 952kB 8.7MB/s 
Requirement already satisfied, skipping upgrade: PyYAML in /usr/local/lib/python3.6/dist-packages (from albumentations==0.4.6) (3.13)
Requirement already satisfied, skipping upgrade: opencv-python>=4.1.1 in /usr/local/lib/python3.6/dist-packages (from albumentations==0.4.6) (4.1.2.30)
Requirement already satisfied, skipping upgrade: pillow>=4.3.0 in /usr/local/lib/python3.6/dist-packages (from scikit-image>=0.16.1->albumentations==0.4.6) (7.0.0)
Requirement already satisfied, skipping upgrade: imageio>=2.3.0 in /usr/local/lib/python3.6/dist-packages (from scikit-image>=0.16.1->albumentations==0.4.6) (2.4.1)
Requirement already satisfied, skipping upgrade: matplotlib!=3.0.0,>=2.0.0 in /usr/local/lib/python3.6/dist-packages (from scikit-image>=0.16.1->albumentations==0.4.6) (3.2.2)
Requirement already satisfied, skipping upgrade: networkx>=2.0 in /usr/local/lib/python3.6/dist-packages (from scikit-image>=0.16.1->albumentations==0.4.6) (2.5)
Requirement already satisfied, skipping upgrade: PyWavelets>=0.4.0 in /usr/local/lib/python3.6/dist-packages (from scikit-image>=0.16.1->albumentations==0.4.6) (1.1.1)
Requirement already satisfied, skipping upgrade: Shapely in /usr/local/lib/python3.6/dist-packages (from imgaug>=0.4.0->albumentations==0.4.6) (1.7.1)
Requirement already satisfied, skipping upgrade: six in /usr/local/lib/python3.6/dist-packages (from imgaug>=0.4.0->albumentations==0.4.6) (1.15.0)
Requirement already satisfied, skipping upgrade: cycler>=0.10 in /usr/local/lib/python3.6/dist-packages (from matplotlib!=3.0.0,>=2.0.0->scikit-image>=0.16.1->albumentations==0.4.6) (0.10.0)
Requirement already satisfied, skipping upgrade: python-dateutil>=2.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib!=3.0.0,>=2.0.0->scikit-image>=0.16.1->albumentations==0.4.6) (2.8.1)
Requirement already satisfied, skipping upgrade: kiwisolver>=1.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib!=3.0.0,>=2.0.0->scikit-image>=0.16.1->albumentations==0.4.6) (1.2.0)
Requirement already satisfied, skipping upgrade: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib!=3.0.0,>=2.0.0->scikit-image>=0.16.1->albumentations==0.4.6) (2.4.7)
Requirement already satisfied, skipping upgrade: decorator>=4.3.0 in /usr/local/lib/python3.6/dist-packages (from networkx>=2.0->scikit-image>=0.16.1->albumentations==0.4.6) (4.4.2)
Building wheels for collected packages: albumentations
  Building wheel for albumentations (setup.py) ... done
  Created wheel for albumentations: filename=albumentations-0.4.6-cp36-none-any.whl size=69177 sha256=a8e3d557ae2427bef7668b4e54fbe544f5485c8d577cbfe2fbbd5f3099198842
  Stored in directory: /tmp/pip-ephem-wheel-cache-nloncp66/wheels/6f/77/82/86baf8aeda64a6de0f890cd0f2fb31acaf5545cc9c99ad21ba
Successfully built albumentations
Installing collected packages: imgaug, albumentations
  Found existing installation: imgaug 0.2.9
    Uninstalling imgaug-0.2.9:
      Successfully uninstalled imgaug-0.2.9
  Found existing installation: albumentations 0.1.12
    Uninstalling albumentations-0.1.12:
      Successfully uninstalled albumentations-0.1.12
Successfully installed albumentations-0.4.6 imgaug-0.4.0
In [2]:
import torch
import random
import pandas as pd
import numpy as np
from torch.utils.data import DataLoader, Dataset
import math
import sys
import skimage.io
import os
import json
import seaborn as sns
import tensorboard
import time
import albumentations as  A
from torch.utils.tensorboard import SummaryWriter
from albumentations.pytorch.transforms import ToTensor
import torchvision


import imageio
import cv2 as cv
from matplotlib import pyplot as plt
In [3]:
%cd /content/drive/My Drive/Foodvisor/challenge
/content/drive/My Drive/Foodvisor/challenge

Data exploration

In [4]:
img_names = os.listdir('assignment_imgs')
img_annotations = open('img_annotations.json')
img_annotations = json.load(img_annotations)
info_df = pd.read_csv('label_mapping.csv')
#example
for k in range(3):
  imgex = img_names[k]
  image = skimage.io.imread('assignment_imgs/'+imgex)/255
  print('image size is ',np.shape(image))
  fig = plt.figure(figsize=(10,10))
  plt.imshow(image)
  nb_windows = len(img_annotations[imgex])
  color=plt.cm.rainbow(np.linspace(0,1,nb_windows))
  labels = []
  for i in range(nb_windows):
    annot_dict = img_annotations[imgex][i]
    bbox = annot_dict['box']
    id = annot_dict['id']
    x1 , y1, x2 , y2 = bbox[0], bbox[1], bbox[0]+bbox[2], bbox[1]+bbox[3]
    print([x1,y1])
    print([x2,y2])
    plt.plot([x1, x1, x2, x2, x1], [y1, y2, y2, y1, y1], 'b-',color=color[i])
    label = info_df.loc[info_df['labelling_id']==id]['labelling_name_fr'].item()
    labels.append(label)
  plt.legend(labels)
  plt.show()
image size is  (600, 600, 3)
[51, 92]
[212, 277]
[238, 216]
[511, 518]
[0, 270]
[240, 599]
image size is  (600, 600, 3)
[36, 28]
[558, 451]
[0, 450]
[599, 599]
image size is  (600, 600, 3)
[180, 422]
[401, 596]
[30, 270]
[210, 599]
In [5]:
def create_df(info_df,img_annotations):
  """
  Create df with useful informations for tomatoes allergy detection
  """
  image_paths = []
  labels = []
  bboxs = []
  is_tomates = []
  nb_tomates = 0
  nb_objects = 0
  for key in img_annotations.keys():
    image_path = key
    image_paths.append(key)
    nb_windows = len(img_annotations[key])
    img_labels = []
    img_bboxs = []
    tomate_per_img = []
    for k in range(nb_windows):
      annot_dict = img_annotations[key][k]
      
      if not annot_dict['is_background']:
        bbox = annot_dict['box']
        id = annot_dict['id']
        fr_label = info_df.loc[info_df['labelling_id']==id]['labelling_name_fr'].item()
        # The images are going to be downsampled by 2 for computational reasons
        x1 , y1, h , w = bbox[0]//2, bbox[1]//2, bbox[2]//2-1, bbox[3]//2-1
        # sometimes the bounding box is too big
        h = min(h,299-y1)
        w = min(w,299-x1)
        # we use the coco bbox convention
        img_bboxs.append([x1,y1,w,h])
        img_labels.append(fr_label)
        """
        if annot_dict['is_background']:
          tomate_per_img.append(2)
        """
      
        if 'Tomate' in fr_label or 'Raviolis sauce tomate' in fr_label:
            tomate_per_img.append(1)
            nb_tomates = nb_tomates + 1
        else:
          nb_objects = nb_objects + 1
          tomate_per_img.append(0)
    is_tomates.append(tomate_per_img)
    labels.append(img_labels)
    bboxs.append(img_bboxs)
  data= {'image_path':image_paths,'label':labels,'bbox':bboxs,'is_tomato':is_tomates}
  df= pd.DataFrame(data)
  return df, nb_tomates , nb_objects
In [6]:
df , nb_tomates, nb_objects = create_df(info_df,img_annotations)
#Setting the weights for dealing with class imbalance
w_1 = nb_objects/nb_tomates
w_0 = 1
print("The weights of errors for tomatoe samples:",w_1 )
The weights of errors for tomatoe samples: 10.538896746817539
In [7]:
print('proportion of tomatoes ',100*(nb_tomates/(nb_objects+nb_tomates)))
proportion of tomatoes  8.666339789164011

Data preprocessing:

In [8]:
class FoodDataset(Dataset):
    """Foodvisor tomatoe detection dataset."""

    def __init__(self,image_dir,info_df ,input_size=(300,300), transform=None, weights=[w_0,w_1]):
        """
        Args:
            info_df (Dataframe): Dataframe of the image paths and annotations.
            image_dir (string): Directory with all the images
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.info_df = info_df
        self.image_dir = image_dir
        self.input_size=input_size
        self.transform = transform
        self.weights = weights

    def __len__(self):
        return len(self.info_df)
    
    def load_image(self, idx):
        """Generate an image from the specs of the given image ID.
        
        """
        image_id = self.info_df.loc[idx, "image_path"]    
        img_name = os.path.join(self.image_dir,image_id)
        image = skimage.io.imread(img_name)
         
        return image

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        image_path = self.info_df.loc[idx, "image_path"]    
        img_name = os.path.join(self.image_dir,image_path)
        image = skimage.io.imread(img_name)

        labels = self.info_df.loc[idx, "is_tomato"]

        bboxes  = self.info_df.loc[idx, "bbox"]
        resize_transform = A.Resize(self.input_size[0],self.input_size[1])(image=image)
        image = resize_transform['image']
          
        if self.transform:
          data = {"image": image,"bboxes":bboxes,'class_labels':labels}
          augmented = self.transform(**data)
          image = augmented['image']
          bboxes = augmented['bboxes']
          labels = augmented['class_labels']
        # Normalization and converting everything to Tensor
        simple_transform = simple_preprocess()(image= image)
        image = simple_transform['image']
        bboxes = torch.as_tensor(bboxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        image_id = torch.Tensor([idx])

        # Use the COCO template for targets to be able to evaluate the model with COCO API
        area = bboxes[:,2]*bboxes[:,3]
        bboxes[:,2] = bboxes[:,0]+bboxes[:,2]
        bboxes[:,3] = bboxes[:,1] + bboxes[:,3]
        target = {"boxes": bboxes,"labels": labels,"image_id": image_id,"area": area,"iscrowd": torch.as_tensor([0], dtype=torch.int64)}

        return image, target, self.weights[1 in target['labels']]
In [9]:
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
normalize = {'mean':mean,'std':std}
def complex_preprocess():
    return A.Compose([A.HorizontalFlip(),
                #A.RandomCrop(width=600, height=600,p=.9),       
                A.VerticalFlip(),
                #A.Crop(x_min = 0, y_min = 0, x_max = 300, y_max = 300),
                #A.CLAHE(clip_limit=4,p=0.3),
                #A.ChannelDropout(p=0.2),
                #A.HueSaturationValue(p=0.2),
                #A.Posterize(p=0.2),
                #A.RGBShift(50,50,50,p=0.3),
                A.RandomGamma((40,120)),
                A.GaussNoise(p=0.3),
                A.Blur(blur_limit = 8,p=0.3),
                A.RandomContrast((-0.4,0.4))], bbox_params=A.BboxParams(format='coco',label_fields=['class_labels']))
def simple_preprocess():
    return A.Compose([ToTensor()
            ])
#normalize=normalize
In [10]:
def to_numpy(x):
    if not (isinstance(x, np.ndarray) or x is None):
        if x.is_cuda: 
            x = x.data.cpu()
        x = x.numpy()
    return x
def unnormalize(img,mean = mean, std = std,image_size=(300,300,3)):

  "Unnormalize a given image tensor and make it plotable"
  img= img.permute(1, 2, 0)
  # plt imshow only accept positive values
  unnormalized_img = torch.tensor(np.ones(image_size))
  for c in range(3):
    unnormalized_img[:,:,c] = std[c]*img[:,:,c] +mean[c]
  return to_numpy(unnormalized_img)
In [11]:
img_dir = 'assignment_imgs'
dataset = FoodDataset(image_dir = img_dir ,info_df = df ,input_size=(300,300,3), transform=None)
In [12]:
# Augmentation vizualisation NB: Mettre conventions de coco
for i in range(20):
    fig = plt.Figure(figsize=(20,20))
    img, target,weights = dataset.__getitem__(i)
    print(weights)
    bboxes = target['boxes']
    unnormalized_img = unnormalize(img,mean = (0,0,0),std = (1,1,1))
    plt.imshow(to_numpy(unnormalized_img))
    nb_window = len(bboxes)
    print(target['labels'])
    for k in range(nb_window):
      bbox = bboxes[k]
      x1 , y1, x2 , y2 = bbox[0], bbox[1], bbox[2], bbox[3]
      plt.plot([x1, x1, x2, x2, x1], [y1, y2, y2, y1, y1], 'b-')
    plt.show()
Output hidden; open in https://colab.research.google.com to view.
In [ ]:
#splitting into train, valid and test
from sklearn.model_selection import train_test_split

train_val_df, test_df = train_test_split(df, test_size=0.1, random_state=42, shuffle=True)
train_df , val_df = train_test_split(train_val_df, test_size=0.2, random_state=42, shuffle=True)
train_df = train_df.reset_index()
val_df = val_df.reset_index()
test_df = test_df.reset_index()

utils

In [ ]:
from collections import defaultdict, deque
import datetime
import pickle
import time

import torch
import torch.distributed as dist

import errno
import os


class SmoothedValue(object):
    """Track a series of values and provide access to smoothed values over a
    window or the global series average.
    """

    def __init__(self, window_size=20, fmt=None):
        if fmt is None:
            fmt = "{median:.4f} ({global_avg:.4f})"
        self.deque = deque(maxlen=window_size)
        self.total = 0.0
        self.count = 0
        self.fmt = fmt

    def update(self, value, n=1):
        self.deque.append(value)
        self.count += n
        self.total += value * n

    def synchronize_between_processes(self):
        """
        Warning: does not synchronize the deque!
        """
        if not is_dist_avail_and_initialized():
            return
        t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda')
        dist.barrier()
        dist.all_reduce(t)
        t = t.tolist()
        self.count = int(t[0])
        self.total = t[1]

    @property
    def median(self):
        d = torch.tensor(list(self.deque))
        return d.median().item()

    @property
    def avg(self):
        d = torch.tensor(list(self.deque), dtype=torch.float32)
        return d.mean().item()

    @property
    def global_avg(self):
        return self.total / self.count

    @property
    def max(self):
        return max(self.deque)

    @property
    def value(self):
        return self.deque[-1]

    def __str__(self):
        return self.fmt.format(
            median=self.median,
            avg=self.avg,
            global_avg=self.global_avg,
            max=self.max,
            value=self.value)


def all_gather(data):
    """
    Run all_gather on arbitrary picklable data (not necessarily tensors)
    Args:
        data: any picklable object
    Returns:
        list[data]: list of data gathered from each rank
    """
    world_size = get_world_size()
    if world_size == 1:
        return [data]

    # serialized to a Tensor
    buffer = pickle.dumps(data)
    storage = torch.ByteStorage.from_buffer(buffer)
    tensor = torch.ByteTensor(storage).to("cuda")

    # obtain Tensor size of each rank
    local_size = torch.tensor([tensor.numel()], device="cuda")
    size_list = [torch.tensor([0], device="cuda") for _ in range(world_size)]
    dist.all_gather(size_list, local_size)
    size_list = [int(size.item()) for size in size_list]
    max_size = max(size_list)

    # receiving Tensor from all ranks
    # we pad the tensor because torch all_gather does not support
    # gathering tensors of different shapes
    tensor_list = []
    for _ in size_list:
        tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda"))
    if local_size != max_size:
        padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device="cuda")
        tensor = torch.cat((tensor, padding), dim=0)
    dist.all_gather(tensor_list, tensor)

    data_list = []
    for size, tensor in zip(size_list, tensor_list):
        buffer = tensor.cpu().numpy().tobytes()[:size]
        data_list.append(pickle.loads(buffer))

    return data_list


def reduce_dict(input_dict, average=True):
    """
    Args:
        input_dict (dict): all the values will be reduced
        average (bool): whether to do average or sum
    Reduce the values in the dictionary from all processes so that all processes
    have the averaged results. Returns a dict with the same fields as
    input_dict, after reduction.
    """
    world_size = get_world_size()
    if world_size < 2:
        return input_dict
    with torch.no_grad():
        names = []
        values = []
        # sort the keys so that they are consistent across processes
        for k in sorted(input_dict.keys()):
            names.append(k)
            values.append(input_dict[k])
        values = torch.stack(values, dim=0)
        dist.all_reduce(values)
        if average:
            values /= world_size
        reduced_dict = {k: v for k, v in zip(names, values)}
    return reduced_dict


class MetricLogger(object):
    def __init__(self, delimiter="\t"):
        self.meters = defaultdict(SmoothedValue)
        self.delimiter = delimiter

    def update(self, **kwargs):
        for k, v in kwargs.items():
            if isinstance(v, torch.Tensor):
                v = v.item()
            assert isinstance(v, (float, int))
            self.meters[k].update(v)

    def __getattr__(self, attr):
        if attr in self.meters:
            return self.meters[attr]
        if attr in self.__dict__:
            return self.__dict__[attr]
        raise AttributeError("'{}' object has no attribute '{}'".format(
            type(self).__name__, attr))

    def __str__(self):
        loss_str = []
        for name, meter in self.meters.items():
            loss_str.append(
                "{}: {}".format(name, str(meter))
            )
        return self.delimiter.join(loss_str)

    def synchronize_between_processes(self):
        for meter in self.meters.values():
            meter.synchronize_between_processes()

    def add_meter(self, name, meter):
        self.meters[name] = meter

    def log_every(self, iterable, print_freq, header=None):
        i = 0
        if not header:
            header = ''
        start_time = time.time()
        end = time.time()
        iter_time = SmoothedValue(fmt='{avg:.4f}')
        data_time = SmoothedValue(fmt='{avg:.4f}')
        space_fmt = ':' + str(len(str(len(iterable)))) + 'd'
        if torch.cuda.is_available():
            log_msg = self.delimiter.join([
                header,
                '[{0' + space_fmt + '}/{1}]',
                'eta: {eta}',
                '{meters}',
                'time: {time}',
                'data: {data}',
                'max mem: {memory:.0f}'
            ])
        else:
            log_msg = self.delimiter.join([
                header,
                '[{0' + space_fmt + '}/{1}]',
                'eta: {eta}',
                '{meters}',
                'time: {time}',
                'data: {data}'
            ])
        MB = 1024.0 * 1024.0
        for obj in iterable:
            data_time.update(time.time() - end)
            yield obj
            iter_time.update(time.time() - end)
            if i % print_freq == 0 or i == len(iterable) - 1:
                eta_seconds = iter_time.global_avg * (len(iterable) - i)
                eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
                if torch.cuda.is_available():
                    print(log_msg.format(
                        i, len(iterable), eta=eta_string,
                        meters=str(self),
                        time=str(iter_time), data=str(data_time),
                        memory=torch.cuda.max_memory_allocated() / MB))
                else:
                    print(log_msg.format(
                        i, len(iterable), eta=eta_string,
                        meters=str(self),
                        time=str(iter_time), data=str(data_time)))
            i += 1
            end = time.time()
        total_time = time.time() - start_time
        total_time_str = str(datetime.timedelta(seconds=int(total_time)))
        print('{} Total time: {} ({:.4f} s / it)'.format(
            header, total_time_str, total_time / len(iterable)))


def collate_fn(batch):
    return tuple(zip(*batch))


def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor):

    def f(x):
        if x >= warmup_iters:
            return 1
        alpha = float(x) / warmup_iters
        return warmup_factor * (1 - alpha) + alpha

    return torch.optim.lr_scheduler.LambdaLR(optimizer, f)


def mkdir(path):
    try:
        os.makedirs(path)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise


def setup_for_distributed(is_master):
    """
    This function disables printing when not in master process
    """
    import builtins as __builtin__
    builtin_print = __builtin__.print

    def print(*args, **kwargs):
        force = kwargs.pop('force', False)
        if is_master or force:
            builtin_print(*args, **kwargs)

    __builtin__.print = print


def is_dist_avail_and_initialized():
    if not dist.is_available():
        return False
    if not dist.is_initialized():
        return False
    return True


def get_world_size():
    if not is_dist_avail_and_initialized():
        return 1
    return dist.get_world_size()


def get_rank():
    if not is_dist_avail_and_initialized():
        return 0
    return dist.get_rank()


def is_main_process():
    return get_rank() == 0


def save_on_master(*args, **kwargs):
    if is_main_process():
        torch.save(*args, **kwargs)


def init_distributed_mode(args):
    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
        args.rank = int(os.environ["RANK"])
        args.world_size = int(os.environ['WORLD_SIZE'])
        args.gpu = int(os.environ['LOCAL_RANK'])
    elif 'SLURM_PROCID' in os.environ:
        args.rank = int(os.environ['SLURM_PROCID'])
        args.gpu = args.rank % torch.cuda.device_count()
    else:
        print('Not using distributed mode')
        args.distributed = False
        return

    args.distributed = True

    torch.cuda.set_device(args.gpu)
    args.dist_backend = 'nccl'
    print('| distributed init (rank {}): {}'.format(
        args.rank, args.dist_url), flush=True)
    torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
                                         world_size=args.world_size, rank=args.rank)
    torch.distributed.barrier()
    setup_for_distributed(args.rank == 0)

Training utils

In [ ]:
def accuracy(gt_labels, pred_labels):
    return len(np.where(gt_labels == pred_labels)[0])/len(gt_labels)
from sklearn.metrics import roc_auc_score,f1_score, recall_score, precision_score, confusion_matrix
def compute_metrics(true_label,pred_label):
  recall = recall_score(true_label,pred_label)
  precision = precision_score(true_label,pred_label)
  f1_score = 2 * (precision * recall) / (precision + recall)
  tn, fp, fn, tp = confusion_matrix(true_label, pred_label).ravel()
  sensitivity = tp / (tp + fn)
  specificity = tn / (tn + fp)
  return {'recall':recall,'precision':precision,'f1':f1_score,'sensitivity':sensitivity,'specificity':specificity}
def IoU(boxA, boxB):
    # determine the (x, y)-coordinates of the intersection rectangle
    xA1, yA1, xA2, yA2 = boxA 
    xB1, yB1, xB2, yB2 = boxB
    x1 = max(xA1, xB1)
    y1 = max(yA1, yB1)
    x2 = min(xA2, xB2)
    y2 = min(yA2, yB2)
    # compute the area of intersection rectangle
    intersection = max(0, x2 - x1 + 1) * max(0, y2 - y1 + 1)
    # compute the area of both the prediction and ground-truth
    # rectangles
    boxAArea = (xA2 - xA1 + 1) * (yA2 - yA1 + 1)
    boxBArea = (xB2 - xB1 + 1) * (yB2 - yB1 + 1)
    union = boxAArea + boxBArea - intersection
    return intersection / union
In [ ]:
def train_one_epoch(model, optimizer,criterion,weights, data_loader, device, epoch, print_freq):
    model.train()
    metric_logger = MetricLogger(delimiter="  ")
    metric_logger.add_meter('lr', SmoothedValue(window_size=1, fmt='{value:.6f}'))
    header = 'Epoch: [{}]'.format(epoch)

    for i, values in enumerate(metric_logger.log_every(data_loader, print_freq, header)):
        images, targets, weights = values
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        weights = torch.Tensor(weights).to(device)

        # Feed the training samples to the model and compute the losses
        loss_dict = model(images, targets)
        loss_dict['loss_classifier'] = 10*loss_dict['loss_classifier']
         
        losses = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = reduce_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())
        loss_value = losses_reduced.item()

        if not math.isfinite(loss_value):
            print("Loss is {}, stopping training".format(loss_value))
            print(loss_dict_reduced)
            sys.exit(1)

        # Pytorch function to initialize optimizer
        optimizer.zero_grad()
        # Compute gradients or the backpropagation
        losses.backward()
        # Update current gradient
        optimizer.step()

        metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
        metric_logger.update(lr=optimizer.param_groups[0]["lr"])

        # Record losses to plot learning curves
        if i == 0: 
            history = {key: val.cpu().detach() for key, val in loss_dict_reduced.items()}
            history['loss'] = losses_reduced.cpu().detach()
        else:
            for key, val in loss_dict_reduced.items():history[key] += val.cpu().detach()
            history['loss'] += losses_reduced.cpu().detach()
    return history
In [ ]:
if torch.cuda.is_available():
    device='cuda'
else:
    device = 'cpu'

def validate_one_epoch(model, data_loader, device=device, print_freq=100):
    model.train()
    metric_logger = MetricLogger(delimiter="  ")
    header = "Validation: "
    for i, values in enumerate(metric_logger.log_every(data_loader, print_freq, header)):
        images, targets, weights = values
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        loss_dict = model(images, targets)

        losses = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = reduce_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())
        loss_value = losses_reduced.item()
        metric_logger.update(loss=losses_reduced, **loss_dict_reduced)

        # Record losses to plot learning curves
        if i == 0: 
            history = {key: val.cpu().detach() for key, val in loss_dict_reduced.items()}
            history['loss'] = losses_reduced.cpu().detach()
        else:
            for key, val in loss_dict_reduced.items():history[key] += val.cpu().detach()
            history['loss'] += losses_reduced.cpu().detach()
    return history
    
def evaluate(model, loader, device,batch_size):
    model.eval()
    gt_labels, pred_labels = [], []
    gt_boxes, pred_boxes = np.empty(4), np.empty(4)
    for i, sample in enumerate(loader, 1):
        images, targets, weights = sample
        images = list(img.to(device) for img in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        with torch.no_grad(): 
          prediction = model(images)
        for k in range(batch_size):
          istomate_target = 1 in to_numpy(targets[k]['labels'])
          istomate_pred = 1 in to_numpy(prediction[k]['labels'])
          gt_labels.append(int(istomate_target))
          pred_labels.append(int(istomate_pred))
          if len(to_numpy(prediction[k]['boxes']))>0:
            gt_boxes = np.vstack((gt_boxes, target['boxes'][k]))
            pred_boxes = np.vstack((pred_boxes, prediction[k]['boxes'][0].cpu())) 
    gt_boxes = np.array(gt_boxes)
    pred_boxes = np.array(pred_boxes)    
    pred_labels = np.array(pred_labels)
    gt_labels = np.array(gt_labels)
    d =  compute_metrics(gt_labels, pred_labels)
    print("classification metric over validation set:",d)
    average_iou = np.mean([IoU(gt_boxes[i], pred_boxes[i]) for i in range(len(gt_boxes))])
    print("Average IoU over {} set: {:.2f}".format('validation data', average_iou))
    acc = accuracy(gt_labels, pred_labels)
    print('Accuracy over {} set: {:.3f}'.format('validation_data', acc))
    return d, average_iou, acc

Model

In [ ]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
 
# load a model pre-trained pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False)

# replace the classifier with a new one, that has num_classes which is user-defined
num_classes = 2  # 2 classes (tomatoe or no tomatoes ) 


# get number of input channels for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
In [ ]:
def count_params(model):
    """Count the number of parameters"""
    param_count = np.sum([torch.numel(p) for p in model.parameters()])
    return param_count
print('Total parameters of Faster RCNN: ',count_params(model))
Total parameters of Faster RCNN:  41299161

Training

In [ ]:
# our dataset has 3 classes
num_classes = 2
# move model to the right device
model.to(device)
# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]

optimizer = torch.optim.SGD(params, lr=0.0001,
                            momentum=0.9, weight_decay=0.00005)

lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                               step_size=5,
                                               gamma=0.5)
criterion = torch.nn.BCELoss()
weights = [w_0,w_1]
In [ ]:
batch_size = 4
#Creating datasets from dataframes
train_dataset = FoodDataset(image_dir = img_dir ,info_df = train_df ,input_size=(300,300), transform=complex_preprocess())
val_dataset = FoodDataset(image_dir = img_dir ,info_df = val_df ,input_size=(300,300), transform=None)
#Creating DataLoaders
train_loader = torch.utils.data.DataLoader(train_dataset, 
                                           batch_size=batch_size, shuffle=True,sampler=None,
                                           collate_fn=collate_fn,drop_last=True)
val_loader = torch.utils.data.DataLoader(val_dataset, 
                                           batch_size=batch_size, shuffle=True,sampler=None,
                                           collate_fn=collate_fn,drop_last=True)
In [ ]:
num_epochs = 50
save_frequency = 1
for epoch in range(num_epochs):

    # Train for one epoch, printing every 10 iterations
    train_his_ = train_one_epoch(model, optimizer,criterion,weights, train_loader, device, epoch, print_freq=100)

    # Compute losses over the validation set
    with torch.no_grad():
      val_his_ = validate_one_epoch(model, val_loader, device, print_freq=100)
      d, average_iou, acc = evaluate(model,val_loader,device, batch_size)
    val_his_['f1'] = d['f1']
    val_his_['average_iou'] = average_iou
    val_his_['acc'] = acc
    # Update the learning rate
    lr_scheduler.step()

    # Store loss values to plot learning curves afterwork.
    if epoch == 0: 
        train_history = {k: [v] for k, v in train_his_.items()}
        val_history = {k: [v] for k, v in val_his_.items()}
    else: 
        for k, v in train_his_.items():train_history[k] += [v]
        for k, v in val_his_.items():val_history[k] += [v]

    # Save the model
    if epoch % save_frequency==0:
      session_name = 'Test_session' + '_' + time.strftime('%m.%d %Hh%M')
      save_path = "/content/drive/My Drive/Foodvisor/challenge/model_zoo/"
      model_path = save_path  + session_name + '_'+ 'Epoch'+ str(epoch)+ '_model.pth.tar'
      torch.save(model, model_path)

    torch.cuda.empty_cache()
Epoch: [0]  [  0/540]  eta: 0:03:09  lr: 0.000100  loss: 7.9216 (7.9216)  loss_classifier: 7.2014 (7.2014)  loss_box_reg: 0.0000 (0.0000)  loss_objectness: 0.6956 (0.6956)  loss_rpn_box_reg: 0.0245 (0.0245)  time: 0.3511  data: 0.0735  max mem: 8062
Epoch: [0]  [100/540]  eta: 0:02:34  lr: 0.000100  loss: 0.5064 (1.9037)  loss_classifier: 0.0321 (1.2847)  loss_box_reg: 0.0000 (0.0067)  loss_objectness: 0.3670 (0.5933)  loss_rpn_box_reg: 0.0125 (0.0191)  time: 0.3460  data: 0.0824  max mem: 8062
Epoch: [0]  [200/540]  eta: 0:01:59  lr: 0.000100  loss: 0.1129 (1.1976)  loss_classifier: 0.0145 (0.7951)  loss_box_reg: 0.0000 (0.0126)  loss_objectness: 0.0868 (0.3703)  loss_rpn_box_reg: 0.0200 (0.0196)  time: 0.3440  data: 0.0811  max mem: 8062
Epoch: [0]  [300/540]  eta: 0:01:24  lr: 0.000100  loss: 0.1753 (0.9257)  loss_classifier: 0.0674 (0.6171)  loss_box_reg: 0.0000 (0.0149)  loss_objectness: 0.0675 (0.2747)  loss_rpn_box_reg: 0.0157 (0.0189)  time: 0.3530  data: 0.0863  max mem: 8062
Epoch: [0]  [400/540]  eta: 0:00:49  lr: 0.000100  loss: 0.2682 (0.7831)  loss_classifier: 0.1651 (0.5245)  loss_box_reg: 0.0049 (0.0159)  loss_objectness: 0.0641 (0.2243)  loss_rpn_box_reg: 0.0112 (0.0184)  time: 0.3506  data: 0.0881  max mem: 8062
Epoch: [0]  [500/540]  eta: 0:00:14  lr: 0.000100  loss: 0.3165 (0.7020)  loss_classifier: 0.1971 (0.4745)  loss_box_reg: 0.0172 (0.0172)  loss_objectness: 0.0696 (0.1925)  loss_rpn_box_reg: 0.0192 (0.0178)  time: 0.3573  data: 0.0929  max mem: 8062
Epoch: [0]  [539/540]  eta: 0:00:00  lr: 0.000100  loss: 0.1837 (0.6788)  loss_classifier: 0.0459 (0.4595)  loss_box_reg: 0.0000 (0.0178)  loss_objectness: 0.0635 (0.1836)  loss_rpn_box_reg: 0.0179 (0.0179)  time: 0.3568  data: 0.0933  max mem: 8062
Epoch: [0] Total time: 0:03:09 (0.3515 s / it)
Validation:   [  0/135]  eta: 0:00:26  loss: 0.1348 (0.1348)  loss_classifier: 0.0186 (0.0186)  loss_box_reg: 0.0000 (0.0000)  loss_objectness: 0.0889 (0.0889)  loss_rpn_box_reg: 0.0273 (0.0273)  time: 0.1959  data: 0.0618  max mem: 8062
Validation:   [100/135]  eta: 0:00:07  loss: 0.0991 (0.1272)  loss_classifier: 0.0171 (0.0254)  loss_box_reg: 0.0088 (0.0223)  loss_objectness: 0.0578 (0.0630)  loss_rpn_box_reg: 0.0169 (0.0165)  time: 0.2007  data: 0.0658  max mem: 8062
Validation:   [134/135]  eta: 0:00:00  loss: 0.1240 (0.1275)  loss_classifier: 0.0084 (0.0248)  loss_box_reg: 0.0000 (0.0216)  loss_objectness: 0.0743 (0.0647)  loss_rpn_box_reg: 0.0152 (0.0164)  time: 0.2057  data: 0.0711  max mem: 8062
Validation:  Total time: 0:00:27 (0.2023 s / it)
classification metric over validation set: {'recall': 0.968421052631579, 'precision': 0.3865546218487395, 'f1': 0.5525525525525525, 'sensitivity': 0.968421052631579, 'specificity': 0.6719101123595506}
Average IoU over validation data set: 0.12
Accuracy over validation_data set: 0.724
Epoch: [1]  [  0/540]  eta: 0:03:11  lr: 0.000100  loss: 0.0830 (0.0830)  loss_classifier: 0.0161 (0.0161)  loss_box_reg: 0.0000 (0.0000)  loss_objectness: 0.0509 (0.0509)  loss_rpn_box_reg: 0.0160 (0.0160)  time: 0.3548  data: 0.0869  max mem: 8062
Epoch: [1]  [100/540]  eta: 0:02:35  lr: 0.000100  loss: 0.4664 (0.4422)  loss_classifier: 0.3534 (0.3300)  loss_box_reg: 0.0262 (0.0294)  loss_objectness: 0.0644 (0.0657)  loss_rpn_box_reg: 0.0163 (0.0170)  time: 0.3493  data: 0.0842  max mem: 8062
Epoch: [1]  [200/540]  eta: 0:01:59  lr: 0.000100  loss: 0.3530 (0.3924)  loss_classifier: 0.2376 (0.2851)  loss_box_reg: 0.0268 (0.0252)  loss_objectness: 0.0600 (0.0650)  loss_rpn_box_reg: 0.0149 (0.0172)  time: 0.3467  data: 0.0839  max mem: 8062
Epoch: [1]  [300/540]  eta: 0:01:24  lr: 0.000100  loss: 0.3198 (0.3776)  loss_classifier: 0.2598 (0.2745)  loss_box_reg: 0.0204 (0.0231)  loss_objectness: 0.0580 (0.0631)  loss_rpn_box_reg: 0.0169 (0.0169)  time: 0.3572  data: 0.0942  max mem: 8062
Epoch: [1]  [400/540]  eta: 0:00:49  lr: 0.000100  loss: 0.3140 (0.3725)  loss_classifier: 0.2384 (0.2712)  loss_box_reg: 0.0221 (0.0233)  loss_objectness: 0.0509 (0.0614)  loss_rpn_box_reg: 0.0153 (0.0166)  time: 0.3468  data: 0.0809  max mem: 8062
Epoch: [1]  [500/540]  eta: 0:00:13  lr: 0.000100  loss: 0.1165 (0.3674)  loss_classifier: 0.0249 (0.2663)  loss_box_reg: 0.0000 (0.0234)  loss_objectness: 0.0524 (0.0611)  loss_rpn_box_reg: 0.0134 (0.0167)  time: 0.3479  data: 0.0856  max mem: 8062
Epoch: [1]  [539/540]  eta: 0:00:00  lr: 0.000100  loss: 0.0845 (0.3570)  loss_classifier: 0.0033 (0.2574)  loss_box_reg: 0.0000 (0.0225)  loss_objectness: 0.0437 (0.0606)  loss_rpn_box_reg: 0.0145 (0.0166)  time: 0.3516  data: 0.0889  max mem: 8062
Epoch: [1] Total time: 0:03:09 (0.3502 s / it)
Validation:   [  0/135]  eta: 0:00:27  loss: 0.0466 (0.0466)  loss_classifier: 0.0004 (0.0004)  loss_box_reg: 0.0000 (0.0000)  loss_objectness: 0.0328 (0.0328)  loss_rpn_box_reg: 0.0133 (0.0133)  time: 0.2003  data: 0.0684  max mem: 8062
Validation:   [100/135]  eta: 0:00:07  loss: 0.0604 (0.1136)  loss_classifier: 0.0005 (0.0226)  loss_box_reg: 0.0000 (0.0212)  loss_objectness: 0.0372 (0.0545)  loss_rpn_box_reg: 0.0106 (0.0153)  time: 0.1978  data: 0.0638  max mem: 8062
Validation:   [134/135]  eta: 0:00:00  loss: 0.0918 (0.1198)  loss_classifier: 0.0025 (0.0251)  loss_box_reg: 0.0000 (0.0231)  loss_objectness: 0.0433 (0.0557)  loss_rpn_box_reg: 0.0139 (0.0159)  time: 0.2014  data: 0.0648  max mem: 8062
Validation:  Total time: 0:00:27 (0.2011 s / it)
classification metric over validation set: {'recall': 0.7894736842105263, 'precision': 0.6818181818181818, 'f1': 0.7317073170731707, 'sensitivity': 0.7894736842105263, 'specificity': 0.9213483146067416}
Average IoU over validation data set: 0.12
Accuracy over validation_data set: 0.898
Epoch: [2]  [  0/540]  eta: 0:03:17  lr: 0.000100  loss: 0.0936 (0.0936)  loss_classifier: 0.0305 (0.0305)  loss_box_reg: 0.0000 (0.0000)  loss_objectness: 0.0491 (0.0491)  loss_rpn_box_reg: 0.0140 (0.0140)  time: 0.3662  data: 0.0966  max mem: 8062
Epoch: [2]  [100/540]  eta: 0:02:34  lr: 0.000100  loss: 0.3163 (0.4014)  loss_classifier: 0.2350 (0.2964)  loss_box_reg: 0.0171 (0.0291)  loss_objectness: 0.0563 (0.0582)  loss_rpn_box_reg: 0.0184 (0.0177)  time: 0.3528  data: 0.0885  max mem: 8062
Epoch: [2]  [200/540]  eta: 0:01:59  lr: 0.000100  loss: 0.0828 (0.3570)  loss_classifier: 0.0243 (0.2567)  loss_box_reg: 0.0000 (0.0241)  loss_objectness: 0.0396 (0.0593)  loss_rpn_box_reg: 0.0131 (0.0168)  time: 0.3445  data: 0.0810  max mem: 8062
Epoch: [2]  [300/540]  eta: 0:01:24  lr: 0.000100  loss: 0.2743 (0.3406)  loss_classifier: 0.1945 (0.2425)  loss_box_reg: 0.0201 (0.0230)  loss_objectness: 0.0494 (0.0587)  loss_rpn_box_reg: 0.0132 (0.0164)  time: 0.3587  data: 0.0951  max mem: 8062
Epoch: [2]  [400/540]  eta: 0:00:49  lr: 0.000100  loss: 0.2011 (0.3497)  loss_classifier: 0.0658 (0.2510)  loss_box_reg: 0.0000 (0.0236)  loss_objectness: 0.0518 (0.0587)  loss_rpn_box_reg: 0.0162 (0.0164)  time: 0.3489  data: 0.0857  max mem: 8062
Epoch: [2]  [500/540]  eta: 0:00:14  lr: 0.000100  loss: 0.1433 (0.3401)  loss_classifier: 0.0862 (0.2430)  loss_box_reg: 0.0000 (0.0231)  loss_objectness: 0.0468 (0.0576)  loss_rpn_box_reg: 0.0124 (0.0164)  time: 0.3557  data: 0.0923  max mem: 8062
Epoch: [2]  [539/540]  eta: 0:00:00  lr: 0.000100  loss: 0.1222 (0.3384)  loss_classifier: 0.0567 (0.2415)  loss_box_reg: 0.0000 (0.0230)  loss_objectness: 0.0477 (0.0576)  loss_rpn_box_reg: 0.0140 (0.0163)  time: 0.3564  data: 0.0918  max mem: 8062
Epoch: [2] Total time: 0:03:10 (0.3524 s / it)
Validation:   [  0/135]  eta: 0:00:28  loss: 0.0788 (0.0788)  loss_classifier: 0.0135 (0.0135)  loss_box_reg: 0.0166 (0.0166)  loss_objectness: 0.0363 (0.0363)  loss_rpn_box_reg: 0.0124 (0.0124)  time: 0.2100  data: 0.0686  max mem: 8062
Validation:   [100/135]  eta: 0:00:07  loss: 0.1094 (0.1125)  loss_classifier: 0.0180 (0.0221)  loss_box_reg: 0.0091 (0.0219)  loss_objectness: 0.0481 (0.0527)  loss_rpn_box_reg: 0.0140 (0.0158)  time: 0.2050  data: 0.0689  max mem: 8062
Validation:   [134/135]  eta: 0:00:00  loss: 0.1122 (0.1143)  loss_classifier: 0.0038 (0.0220)  loss_box_reg: 0.0000 (0.0226)  loss_objectness: 0.0428 (0.0539)  loss_rpn_box_reg: 0.0130 (0.0157)  time: 0.2002  data: 0.0631  max mem: 8062
Validation:  Total time: 0:00:27 (0.2020 s / it)
classification metric over validation set: {'recall': 0.9473684210526315, 'precision': 0.5056179775280899, 'f1': 0.6593406593406593, 'sensitivity': 0.9473684210526315, 'specificity': 0.802247191011236}
Average IoU over validation data set: 0.13
Accuracy over validation_data set: 0.828
Epoch: [3]  [  0/540]  eta: 0:03:14  lr: 0.000100  loss: 0.1153 (0.1153)  loss_classifier: 0.0662 (0.0662)  loss_box_reg: 0.0000 (0.0000)  loss_objectness: 0.0380 (0.0380)  loss_rpn_box_reg: 0.0111 (0.0111)  time: 0.3604  data: 0.0949  max mem: 8062
Epoch: [3]  [100/540]  eta: 0:02:34  lr: 0.000100  loss: 0.2505 (0.2914)  loss_classifier: 0.1699 (0.2043)  loss_box_reg: 0.0103 (0.0183)  loss_objectness: 0.0566 (0.0531)  loss_rpn_box_reg: 0.0154 (0.0157)  time: 0.3533  data: 0.0891  max mem: 8062
Epoch: [3]  [200/540]  eta: 0:01:59  lr: 0.000100  loss: 0.2473 (0.3129)  loss_classifier: 0.1825 (0.2242)  loss_box_reg: 0.0159 (0.0212)  loss_objectness: 0.0354 (0.0520)  loss_rpn_box_reg: 0.0111 (0.0155)  time: 0.3494  data: 0.0849  max mem: 8062
Epoch: [3]  [300/540]  eta: 0:01:24  lr: 0.000100  loss: 0.1344 (0.3132)  loss_classifier: 0.0868 (0.2233)  loss_box_reg: 0.0000 (0.0213)  loss_objectness: 0.0415 (0.0531)  loss_rpn_box_reg: 0.0134 (0.0155)  time: 0.3478  data: 0.0843  max mem: 8062
Epoch: [3]  [400/540]  eta: 0:00:49  lr: 0.000100  loss: 0.2827 (0.3252)  loss_classifier: 0.2123 (0.2312)  loss_box_reg: 0.0204 (0.0227)  loss_objectness: 0.0567 (0.0553)  loss_rpn_box_reg: 0.0167 (0.0160)  time: 0.3641  data: 0.1011  max mem: 8062
Epoch: [3]  [500/540]  eta: 0:00:14  lr: 0.000100  loss: 0.0994 (0.3252)  loss_classifier: 0.0581 (0.2310)  loss_box_reg: 0.0000 (0.0227)  loss_objectness: 0.0451 (0.0554)  loss_rpn_box_reg: 0.0133 (0.0161)  time: 0.3570  data: 0.0904  max mem: 8062
Epoch: [3]  [539/540]  eta: 0:00:00  lr: 0.000100  loss: 0.3677 (0.3311)  loss_classifier: 0.2823 (0.2363)  loss_box_reg: 0.0351 (0.0235)  loss_objectness: 0.0452 (0.0552)  loss_rpn_box_reg: 0.0128 (0.0162)  time: 0.3478  data: 0.0841  max mem: 8062
Epoch: [3] Total time: 0:03:10 (0.3530 s / it)
Validation:   [  0/135]  eta: 0:00:25  loss: 0.0393 (0.0393)  loss_classifier: 0.0016 (0.0016)  loss_box_reg: 0.0000 (0.0000)  loss_objectness: 0.0276 (0.0276)  loss_rpn_box_reg: 0.0102 (0.0102)  time: 0.1908  data: 0.0577  max mem: 8062
Validation:   [100/135]  eta: 0:00:07  loss: 0.1030 (0.1105)  loss_classifier: 0.0230 (0.0226)  loss_box_reg: 0.0279 (0.0237)  loss_objectness: 0.0395 (0.0490)  loss_rpn_box_reg: 0.0122 (0.0152)  time: 0.2021  data: 0.0656  max mem: 8062
Validation:   [134/135]  eta: 0:00:00  loss: 0.0885 (0.1117)  loss_classifier: 0.0161 (0.0217)  loss_box_reg: 0.0116 (0.0226)  loss_objectness: 0.0464 (0.0518)  loss_rpn_box_reg: 0.0164 (0.0156)  time: 0.2038  data: 0.0684  max mem: 8062
Validation:  Total time: 0:00:27 (0.2029 s / it)
classification metric over validation set: {'recall': 0.9894736842105263, 'precision': 0.3884297520661157, 'f1': 0.5578635014836795, 'sensitivity': 0.9894736842105263, 'specificity': 0.6674157303370787}
Average IoU over validation data set: 0.10
Accuracy over validation_data set: 0.724
Epoch: [4]  [  0/540]  eta: 0:03:05  lr: 0.000100  loss: 1.3375 (1.3375)  loss_classifier: 1.1215 (1.1215)  loss_box_reg: 0.1119 (0.1119)  loss_objectness: 0.0774 (0.0774)  loss_rpn_box_reg: 0.0267 (0.0267)  time: 0.3442  data: 0.0799  max mem: 8062
Epoch: [4]  [100/540]  eta: 0:02:36  lr: 0.000100  loss: 0.2434 (0.2808)  loss_classifier: 0.1269 (0.1924)  loss_box_reg: 0.0000 (0.0188)  loss_objectness: 0.0631 (0.0545)  loss_rpn_box_reg: 0.0160 (0.0151)  time: 0.3644  data: 0.0989  max mem: 8062
Epoch: [4]  [200/540]  eta: 0:02:00  lr: 0.000100  loss: 0.0871 (0.3061)  loss_classifier: 0.0443 (0.2143)  loss_box_reg: 0.0000 (0.0223)  loss_objectness: 0.0395 (0.0535)  loss_rpn_box_reg: 0.0163 (0.0160)  time: 0.3561  data: 0.0924  max mem: 8062
Epoch: [4]  [300/540]  eta: 0:01:24  lr: 0.000100  loss: 0.1421 (0.3153)  loss_classifier: 0.1029 (0.2239)  loss_box_reg: 0.0000 (0.0235)  loss_objectness: 0.0399 (0.0521)  loss_rpn_box_reg: 0.0127 (0.0158)  time: 0.3518  data: 0.0874  max mem: 8062
Epoch: [4]  [400/540]  eta: 0:00:49  lr: 0.000100  loss: 0.0926 (0.3207)  loss_classifier: 0.0219 (0.2282)  loss_box_reg: 0.0000 (0.0244)  loss_objectness: 0.0331 (0.0521)  loss_rpn_box_reg: 0.0094 (0.0159)  time: 0.3541  data: 0.0904  max mem: 8062
Epoch: [4]  [500/540]  eta: 0:00:14  lr: 0.000100  loss: 0.3312 (0.3228)  loss_classifier: 0.2262 (0.2299)  loss_box_reg: 0.0239 (0.0244)  loss_objectness: 0.0434 (0.0523)  loss_rpn_box_reg: 0.0148 (0.0161)  time: 0.3469  data: 0.0839  max mem: 8062
Epoch: [4]  [539/540]  eta: 0:00:00  lr: 0.000100  loss: 0.2463 (0.3195)  loss_classifier: 0.1077 (0.2264)  loss_box_reg: 0.0001 (0.0240)  loss_objectness: 0.0483 (0.0529)  loss_rpn_box_reg: 0.0186 (0.0161)  time: 0.3523  data: 0.0879  max mem: 8062
Epoch: [4] Total time: 0:03:11 (0.3547 s / it)
Validation:   [  0/135]  eta: 0:00:25  loss: 0.1327 (0.1327)  loss_classifier: 0.0397 (0.0397)  loss_box_reg: 0.0425 (0.0425)  loss_objectness: 0.0342 (0.0342)  loss_rpn_box_reg: 0.0163 (0.0163)  time: 0.1923  data: 0.0595  max mem: 8062
Validation:   [100/135]  eta: 0:00:07  loss: 0.0591 (0.1023)  loss_classifier: 0.0027 (0.0198)  loss_box_reg: 0.0000 (0.0215)  loss_objectness: 0.0342 (0.0459)  loss_rpn_box_reg: 0.0113 (0.0151)  time: 0.1939  data: 0.0579  max mem: 8062
Validation:   [134/135]  eta: 0:00:00  loss: 0.1352 (0.1120)  loss_classifier: 0.0356 (0.0226)  loss_box_reg: 0.0283 (0.0241)  loss_objectness: 0.0413 (0.0497)  loss_rpn_box_reg: 0.0122 (0.0156)  time: 0.2069  data: 0.0713  max mem: 8062
Validation:  Total time: 0:00:27 (0.2030 s / it)
classification metric over validation set: {'recall': 0.9789473684210527, 'precision': 0.41517857142857145, 'f1': 0.5830721003134797, 'sensitivity': 0.9789473684210527, 'specificity': 0.7056179775280899}
Average IoU over validation data set: 0.12
Accuracy over validation_data set: 0.754
Epoch: [5]  [  0/540]  eta: 0:03:15  lr: 0.000050  loss: 0.0709 (0.0709)  loss_classifier: 0.0022 (0.0022)  loss_box_reg: 0.0000 (0.0000)  loss_objectness: 0.0544 (0.0544)  loss_rpn_box_reg: 0.0143 (0.0143)  time: 0.3619  data: 0.0915  max mem: 8062
Epoch: [5]  [100/540]  eta: 0:02:35  lr: 0.000050  loss: 0.2925 (0.3097)  loss_classifier: 0.1717 (0.2188)  loss_box_reg: 0.0260 (0.0246)  loss_objectness: 0.0419 (0.0505)  loss_rpn_box_reg: 0.0157 (0.0157)  time: 0.3538  data: 0.0889  max mem: 8062
Epoch: [5]  [200/540]  eta: 0:02:00  lr: 0.000050  loss: 0.1362 (0.3057)  loss_classifier: 0.0546 (0.2132)  loss_box_reg: 0.0000 (0.0236)  loss_objectness: 0.0389 (0.0528)  loss_rpn_box_reg: 0.0116 (0.0160)  time: 0.3555  data: 0.0904  max mem: 8062
Epoch: [5]  [300/540]  eta: 0:01:25  lr: 0.000050  loss: 0.1866 (0.3073)  loss_classifier: 0.1196 (0.2160)  loss_box_reg: 0.0000 (0.0241)  loss_objectness: 0.0467 (0.0513)  loss_rpn_box_reg: 0.0162 (0.0159)  time: 0.3516  data: 0.0865  max mem: 8062
Epoch: [5]  [400/540]  eta: 0:00:49  lr: 0.000050  loss: 0.3054 (0.3167)  loss_classifier: 0.2050 (0.2252)  loss_box_reg: 0.0285 (0.0249)  loss_objectness: 0.0546 (0.0508)  loss_rpn_box_reg: 0.0172 (0.0159)  time: 0.3555  data: 0.0897  max mem: 8062
Epoch: [5]  [500/540]  eta: 0:00:14  lr: 0.000050  loss: 0.1169 (0.3132)  loss_classifier: 0.0433 (0.2219)  loss_box_reg: 0.0000 (0.0243)  loss_objectness: 0.0384 (0.0510)  loss_rpn_box_reg: 0.0121 (0.0159)  time: 0.3528  data: 0.0893  max mem: 8062
Epoch: [5]  [539/540]  eta: 0:00:00  lr: 0.000050  loss: 0.1499 (0.3099)  loss_classifier: 0.0652 (0.2190)  loss_box_reg: 0.0000 (0.0241)  loss_objectness: 0.0413 (0.0508)  loss_rpn_box_reg: 0.0149 (0.0159)  time: 0.3549  data: 0.0900  max mem: 8062
Epoch: [5] Total time: 0:03:11 (0.3553 s / it)
Validation:   [  0/135]  eta: 0:00:25  loss: 0.1072 (0.1072)  loss_classifier: 0.0238 (0.0238)  loss_box_reg: 0.0240 (0.0240)  loss_objectness: 0.0347 (0.0347)  loss_rpn_box_reg: 0.0247 (0.0247)  time: 0.1877  data: 0.0550  max mem: 8062
Validation:   [100/135]  eta: 0:00:07  loss: 0.0590 (0.1084)  loss_classifier: 0.0088 (0.0214)  loss_box_reg: 0.0000 (0.0246)  loss_objectness: 0.0370 (0.0471)  loss_rpn_box_reg: 0.0131 (0.0153)  time: 0.2046  data: 0.0683  max mem: 8062
Validation:   [134/135]  eta: 0:00:00  loss: 0.1201 (0.1076)  loss_classifier: 0.0218 (0.0212)  loss_box_reg: 0.0253 (0.0239)  loss_objectness: 0.0433 (0.0469)  loss_rpn_box_reg: 0.0137 (0.0156)  time: 0.1972  data: 0.0629  max mem: 8062
Validation:  Total time: 0:00:27 (0.2033 s / it)
classification metric over validation set: {'recall': 0.9473684210526315, 'precision': 0.4945054945054945, 'f1': 0.6498194945848376, 'sensitivity': 0.9473684210526315, 'specificity': 0.7932584269662921}
Average IoU over validation data set: 0.11
Accuracy over validation_data set: 0.820
Epoch: [6]  [  0/540]  eta: 0:03:17  lr: 0.000050  loss: 0.0356 (0.0356)  loss_classifier: 0.0028 (0.0028)  loss_box_reg: 0.0000 (0.0000)  loss_objectness: 0.0281 (0.0281)  loss_rpn_box_reg: 0.0047 (0.0047)  time: 0.3663  data: 0.0942  max mem: 8062
Epoch: [6]  [100/540]  eta: 0:02:34  lr: 0.000050  loss: 0.3164 (0.3285)  loss_classifier: 0.2318 (0.2417)  loss_box_reg: 0.0188 (0.0259)  loss_objectness: 0.0328 (0.0465)  loss_rpn_box_reg: 0.0106 (0.0144)  time: 0.3477  data: 0.0833  max mem: 8062
Epoch: [6]  [200/540]  eta: 0:02:00  lr: 0.000050  loss: 0.2108 (0.3124)  loss_classifier: 0.1249 (0.2249)  loss_box_reg: 0.0106 (0.0251)  loss_objectness: 0.0411 (0.0476)  loss_rpn_box_reg: 0.0149 (0.0148)  time: 0.3695  data: 0.1047  max mem: 8062
Epoch: [6]  [300/540]  eta: 0:01:24  lr: 0.000050  loss: 0.2212 (0.3155)  loss_classifier: 0.1656 (0.2262)  loss_box_reg: 0.0222 (0.0259)  loss_objectness: 0.0414 (0.0482)  loss_rpn_box_reg: 0.0165 (0.0151)  time: 0.3533  data: 0.0879  max mem: 8062
Epoch: [6]  [400/540]  eta: 0:00:49  lr: 0.000050  loss: 0.1455 (0.3079)  loss_classifier: 0.0925 (0.2182)  loss_box_reg: 0.0004 (0.0249)  loss_objectness: 0.0351 (0.0491)  loss_rpn_box_reg: 0.0151 (0.0158)  time: 0.3551  data: 0.0925  max mem: 8062
Epoch: [6]  [500/540]  eta: 0:00:14  lr: 0.000050  loss: 0.2037 (0.2988)  loss_classifier: 0.1245 (0.2098)  loss_box_reg: 0.0000 (0.0241)  loss_objectness: 0.0510 (0.0491)  loss_rpn_box_reg: 0.0166 (0.0159)  time: 0.3551  data: 0.0906  max mem: 8062
Epoch: [6]  [539/540]  eta: 0:00:00  lr: 0.000050  loss: 0.1857 (0.3015)  loss_classifier: 0.1315 (0.2116)  loss_box_reg: 0.0122 (0.0243)  loss_objectness: 0.0463 (0.0497)  loss_rpn_box_reg: 0.0147 (0.0160)  time: 0.3488  data: 0.0836  max mem: 8062
Epoch: [6] Total time: 0:03:11 (0.3551 s / it)
Validation:   [  0/135]  eta: 0:00:26  loss: 0.1123 (0.1123)  loss_classifier: 0.0288 (0.0288)  loss_box_reg: 0.0376 (0.0376)  loss_objectness: 0.0367 (0.0367)  loss_rpn_box_reg: 0.0091 (0.0091)  time: 0.1934  data: 0.0608  max mem: 8062
Validation:   [100/135]  eta: 0:00:07  loss: 0.0771 (0.1066)  loss_classifier: 0.0130 (0.0216)  loss_box_reg: 0.0120 (0.0239)  loss_objectness: 0.0368 (0.0462)  loss_rpn_box_reg: 0.0106 (0.0149)  time: 0.1991  data: 0.0636  max mem: 8062
Validation:   [134/135]  eta: 0:00:00  loss: 0.0963 (0.1071)  loss_classifier: 0.0159 (0.0209)  loss_box_reg: 0.0265 (0.0234)  loss_objectness: 0.0409 (0.0473)  loss_rpn_box_reg: 0.0145 (0.0155)  time: 0.2046  data: 0.0698  max mem: 8062
Validation:  Total time: 0:00:27 (0.2024 s / it)
classification metric over validation set: {'recall': 0.968421052631579, 'precision': 0.45320197044334976, 'f1': 0.6174496644295302, 'sensitivity': 0.968421052631579, 'specificity': 0.750561797752809}
Average IoU over validation data set: 0.13
Accuracy over validation_data set: 0.789
Epoch: [7]  [  0/540]  eta: 0:03:23  lr: 0.000050  loss: 0.0295 (0.0295)  loss_classifier: 0.0048 (0.0048)  loss_box_reg: 0.0000 (0.0000)  loss_objectness: 0.0202 (0.0202)  loss_rpn_box_reg: 0.0045 (0.0045)  time: 0.3771  data: 0.0954  max mem: 8062
Epoch: [7]  [100/540]  eta: 0:02:37  lr: 0.000050  loss: 0.1518 (0.2485)  loss_classifier: 0.0937 (0.1719)  loss_box_reg: 0.0172 (0.0197)  loss_objectness: 0.0349 (0.0425)  loss_rpn_box_reg: 0.0117 (0.0143)  time: 0.3594  data: 0.0931  max mem: 8062
Epoch: [7]  [200/540]  eta: 0:02:01  lr: 0.000050  loss: 0.1707 (0.2924)  loss_classifier: 0.1236 (0.2069)  loss_box_reg: 0.0124 (0.0233)  loss_objectness: 0.0398 (0.0464)  loss_rpn_box_reg: 0.0146 (0.0158)  time: 0.3548  data: 0.0897  max mem: 8062
Epoch: [7]  [300/540]  eta: 0:01:25  lr: 0.000050  loss: 0.1174 (0.2875)  loss_classifier: 0.0352 (0.2006)  loss_box_reg: 0.0000 (0.0226)  loss_objectness: 0.0416 (0.0486)  loss_rpn_box_reg: 0.0133 (0.0157)  time: 0.3465  data: 0.0809  max mem: 8062
Epoch: [7]  [400/540]  eta: 0:00:49  lr: 0.000050  loss: 0.2030 (0.3027)  loss_classifier: 0.1568 (0.2140)  loss_box_reg: 0.0121 (0.0239)  loss_objectness: 0.0302 (0.0489)  loss_rpn_box_reg: 0.0128 (0.0158)  time: 0.3581  data: 0.0933  max mem: 8062
Epoch: [7]  [500/540]  eta: 0:00:14  lr: 0.000050  loss: 0.1061 (0.3032)  loss_classifier: 0.0277 (0.2147)  loss_box_reg: 0.0000 (0.0243)  loss_objectness: 0.0411 (0.0482)  loss_rpn_box_reg: 0.0162 (0.0159)  time: 0.3597  data: 0.0927  max mem: 8062
Epoch: [7]  [539/540]  eta: 0:00:00  lr: 0.000050  loss: 0.1768 (0.3041)  loss_classifier: 0.0901 (0.2155)  loss_box_reg: 0.0000 (0.0245)  loss_objectness: 0.0409 (0.0482)  loss_rpn_box_reg: 0.0145 (0.0159)  time: 0.3511  data: 0.0871  max mem: 8062
Epoch: [7] Total time: 0:03:12 (0.3559 s / it)
Validation:   [  0/135]  eta: 0:00:32  loss: 0.1042 (0.1042)  loss_classifier: 0.0166 (0.0166)  loss_box_reg: 0.0301 (0.0301)  loss_objectness: 0.0368 (0.0368)  loss_rpn_box_reg: 0.0207 (0.0207)  time: 0.2374  data: 0.0999  max mem: 8062
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-74-ab38e891ddf7> in <module>()
      8     # Compute losses over the validation set
      9     with torch.no_grad():
---> 10       val_his_ = validate_one_epoch(model, val_loader, device, print_freq=100)
     11       d, average_iou, acc = evaluate(model,val_loader,device, batch_size)
     12     val_his_['f1'] = d['f1']

<ipython-input-69-b1b7e00e2c7d> in validate_one_epoch(model, data_loader, device, print_freq)
      8     metric_logger = MetricLogger(delimiter="  ")
      9     header = "Validation: "
---> 10     for i, values in enumerate(metric_logger.log_every(data_loader, print_freq, header)):
     11         images, targets, weights = values
     12         images = list(image.to(device) for image in images)

<ipython-input-14-bc81f98c95f5> in log_every(self, iterable, print_freq, header)
    207             ])
    208         MB = 1024.0 * 1024.0
--> 209         for obj in iterable:
    210             data_time.update(time.time() - end)
    211             yield obj

/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py in __next__(self)
    361 
    362     def __next__(self):
--> 363         data = self._next_data()
    364         self._num_yielded += 1
    365         if self._dataset_kind == _DatasetKind.Iterable and \

/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py in _next_data(self)
    401     def _next_data(self):
    402         index = self._next_index()  # may raise StopIteration
--> 403         data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
    404         if self._pin_memory:
    405             data = _utils.pin_memory.pin_memory(data)

/usr/local/lib/python3.6/dist-packages/torch/utils/data/_utils/fetch.py in fetch(self, possibly_batched_index)
     42     def fetch(self, possibly_batched_index):
     43         if self.auto_collation:
---> 44             data = [self.dataset[idx] for idx in possibly_batched_index]
     45         else:
     46             data = self.dataset[possibly_batched_index]

/usr/local/lib/python3.6/dist-packages/torch/utils/data/_utils/fetch.py in <listcomp>(.0)
     42     def fetch(self, possibly_batched_index):
     43         if self.auto_collation:
---> 44             data = [self.dataset[idx] for idx in possibly_batched_index]
     45         else:
     46             data = self.dataset[possibly_batched_index]

<ipython-input-27-5572cce509b1> in __getitem__(self, idx)
     34         image_path = self.info_df.loc[idx, "image_path"]
     35         img_name = os.path.join(self.image_dir,image_path)
---> 36         image = skimage.io.imread(img_name)
     37 
     38         labels = self.info_df.loc[idx, "is_tomato"]

/usr/local/lib/python3.6/dist-packages/skimage/io/_io.py in imread(fname, as_gray, plugin, **plugin_args)
     46 
     47     with file_or_url_context(fname) as fname:
---> 48         img = call_plugin('imread', fname, plugin=plugin, **plugin_args)
     49 
     50     if not hasattr(img, 'ndim'):

/usr/local/lib/python3.6/dist-packages/skimage/io/manage_plugins.py in call_plugin(kind, *args, **kwargs)
    208                                (plugin, kind))
    209 
--> 210     return func(*args, **kwargs)
    211 
    212 

/usr/local/lib/python3.6/dist-packages/skimage/io/_plugins/imageio_plugin.py in imread(*args, **kwargs)
      8 @wraps(imageio_imread)
      9 def imread(*args, **kwargs):
---> 10     return np.asarray(imageio_imread(*args, **kwargs))

/usr/local/lib/python3.6/dist-packages/imageio/core/functions.py in imread(uri, format, **kwargs)
    219 
    220     # Get reader and read first
--> 221     reader = read(uri, format, "i", **kwargs)
    222     with reader:
    223         return reader.get_data(0)

/usr/local/lib/python3.6/dist-packages/imageio/core/functions.py in get_reader(uri, format, mode, **kwargs)
    141 
    142     # Return its reader object
--> 143     return format.get_reader(request)
    144 
    145 

/usr/local/lib/python3.6/dist-packages/imageio/core/format.py in get_reader(self, request)
    172                 "Format %s cannot read in mode %r" % (self.name, select_mode)
    173             )
--> 174         return self.Reader(self, request)
    175 
    176     def get_writer(self, request):

/usr/local/lib/python3.6/dist-packages/imageio/core/format.py in __init__(self, format, request)
    222             self._request = request
    223             # Open the reader/writer
--> 224             self._open(**self.request.kwargs.copy())
    225 
    226         @property

/usr/local/lib/python3.6/dist-packages/imageio/plugins/pillow.py in _open(self, pilmode, as_gray, exifrotate)
    404     class Reader(PillowFormat.Reader):
    405         def _open(self, pilmode=None, as_gray=False, exifrotate=True):
--> 406             return PillowFormat.Reader._open(self, pilmode=pilmode, as_gray=as_gray)
    407 
    408         def _get_file(self):

/usr/local/lib/python3.6/dist-packages/imageio/plugins/pillow.py in _open(self, pilmode, as_gray)
    123             if hasattr(Image, "_decompression_bomb_check"):
    124                 Image._decompression_bomb_check(self._im.size)
--> 125             pil_try_read(self._im)
    126             # Store args
    127             self._kwargs = dict(

/usr/local/lib/python3.6/dist-packages/imageio/plugins/pillow.py in pil_try_read(im)
    499     try:
    500         # this will raise an IOError if the file is not readable
--> 501         im.getdata()[0]
    502     except IOError as e:
    503         site = "http://pillow.readthedocs.io/en/latest/installation.html"

/usr/local/lib/python3.6/dist-packages/PIL/Image.py in getdata(self, band)
   1247         """
   1248 
-> 1249         self.load()
   1250         if band is not None:
   1251             return self.im.getband(band)

/usr/local/lib/python3.6/dist-packages/PIL/ImageFile.py in load(self)
    231                         while True:
    232                             try:
--> 233                                 s = read(self.decodermaxblock)
    234                             except (IndexError, struct.error):
    235                                 # truncated png/gif

/usr/local/lib/python3.6/dist-packages/PIL/JpegImagePlugin.py in load_read(self, read_bytes)
    395         so libjpeg can finish decoding
    396         """
--> 397         s = self.fp.read(read_bytes)
    398 
    399         if not s and ImageFile.LOAD_TRUNCATED_IMAGES:

KeyboardInterrupt: 
In [ ]:
for k in train_history:
    """ FILL HERE """
    plt.plot(np.arange(len(train_history[k]), dtype=int), train_history[k]/np.max(train_history[k]), label='Train')
    plt.plot(np.arange(len(val_history[k]), dtype=int), val_history[k]/np.max(val_history[k]), label='Validation')
    plt.title(k)
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()

Inference

In [ ]:
test_dataset = FoodDataset(image_dir = img_dir,info_df=test_df)

#retrieve model
PATH =  '/content/drive/My Drive/Foodvisor/challenge/model_zoo/Test_session_10.09 14h37_Epoch1_model.pth.tar'
model = torch.load(PATH)
model.to(device)
Out[ ]:
FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256)
          (relu): ReLU(inplace=True)
          (downsample): Sequential(
            (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (1): FrozenBatchNorm2d(256)
          )
        )
        (1): Bottleneck(
          (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256)
          (relu): ReLU(inplace=True)
        )
        (2): Bottleneck(
          (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256)
          (relu): ReLU(inplace=True)
        )
      )
      (layer2): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(128)
          (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(128)
          (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(512)
          (relu): ReLU(inplace=True)
          (downsample): Sequential(
            (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
            (1): FrozenBatchNorm2d(512)
          )
        )
        (1): Bottleneck(
          (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(128)
          (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(128)
          (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(512)
          (relu): ReLU(inplace=True)
        )
        (2): Bottleneck(
          (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(128)
          (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(128)
          (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(512)
          (relu): ReLU(inplace=True)
        )
        (3): Bottleneck(
          (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(128)
          (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(128)
          (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(512)
          (relu): ReLU(inplace=True)
        )
      )
      (layer3): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(256)
          (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(256)
          (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(1024)
          (relu): ReLU(inplace=True)
          (downsample): Sequential(
            (0): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)
            (1): FrozenBatchNorm2d(1024)
          )
        )
        (1): Bottleneck(
          (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(256)
          (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(256)
          (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(1024)
          (relu): ReLU(inplace=True)
        )
        (2): Bottleneck(
          (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(256)
          (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(256)
          (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(1024)
          (relu): ReLU(inplace=True)
        )
        (3): Bottleneck(
          (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(256)
          (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(256)
          (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(1024)
          (relu): ReLU(inplace=True)
        )
        (4): Bottleneck(
          (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(256)
          (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(256)
          (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(1024)
          (relu): ReLU(inplace=True)
        )
        (5): Bottleneck(
          (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(256)
          (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(256)
          (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(1024)
          (relu): ReLU(inplace=True)
        )
      )
      (layer4): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(512)
          (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(512)
          (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(2048)
          (relu): ReLU(inplace=True)
          (downsample): Sequential(
            (0): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False)
            (1): FrozenBatchNorm2d(2048)
          )
        )
        (1): Bottleneck(
          (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(512)
          (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(512)
          (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(2048)
          (relu): ReLU(inplace=True)
        )
        (2): Bottleneck(
          (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(512)
          (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(512)
          (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(2048)
          (relu): ReLU(inplace=True)
        )
      )
    )
    (fpn): FeaturePyramidNetwork(
      (inner_blocks): ModuleList(
        (0): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
        (1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
        (2): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
        (3): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
      )
      (layer_blocks): ModuleList(
        (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      )
      (extra_blocks): LastLevelMaxPool()
    )
  )
  (rpn): RegionProposalNetwork(
    (anchor_generator): AnchorGenerator()
    (head): RPNHead(
      (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (cls_logits): Conv2d(256, 3, kernel_size=(1, 1), stride=(1, 1))
      (bbox_pred): Conv2d(256, 12, kernel_size=(1, 1), stride=(1, 1))
    )
  )
  (roi_heads): RoIHeads(
    (box_roi_pool): MultiScaleRoIAlign()
    (box_head): TwoMLPHead(
      (fc6): Linear(in_features=12544, out_features=1024, bias=True)
      (fc7): Linear(in_features=1024, out_features=1024, bias=True)
    )
    (box_predictor): FastRCNNPredictor(
      (cls_score): Linear(in_features=1024, out_features=2, bias=True)
      (bbox_pred): Linear(in_features=1024, out_features=8, bias=True)
    )
  )
)
In [ ]:
test_loader = torch.utils.data.DataLoader(test_dataset, 
                                           batch_size=1, shuffle=True,sampler=None,
                                           collate_fn=collate_fn,drop_last=True)

def has_tomatoes(img_path, model, nms_thres=0.001,device='cuda'):

    model.to(device)
    model.eval()
    image = skimage.io.imread(img_path)
    resize_transform = A.Resize(300,300)(image=image)
    image = resize_transform['image']
    simple_transform = simple_preprocess()(image= image)
    image = simple_transform['image']
    
    image = image.unsqueeze(0)
    with torch.no_grad():
      prediction = model(image.to(device))
    boxes = to_numpy(prediction[0]['boxes'])
    scores = to_numpy(prediction[0]['scores'])
    #kept_boxes = non_max_suppression(boxes, scores, nms_thres)
    pred_label = to_numpy(prediction[0]["labels"])#[kept_boxes]
    return 1 in pred_label    
In [ ]:
for img_path in img_names[80:90]:
  start = time.time()
  fig = plt.figure(figsize=(5,5))
  img_path = os.path.join('assignment_imgs',img_path)
  b = has_tomatoes(img_path,model)
  end = time.time()
  print('prediction done in',end-start)
  print('is there a tomatoe?',b)
  plt.imshow(skimage.io.imread(img_path))
  plt.show()
prediction done in 0.058470726013183594
is there a tomatoe? True
prediction done in 0.06983804702758789
is there a tomatoe? False
prediction done in 0.059403419494628906
is there a tomatoe? False
prediction done in 0.07090973854064941
is there a tomatoe? False
prediction done in 0.06683158874511719
is there a tomatoe? False
prediction done in 0.0836629867553711
is there a tomatoe? False
prediction done in 0.0716085433959961
is there a tomatoe? False
prediction done in 0.07543230056762695
is there a tomatoe? True
prediction done in 0.07513570785522461
is there a tomatoe? False
prediction done in 0.06154680252075195
is there a tomatoe? False
In [ ]:
start = time.time()
evaluate(model,test_loader,'cuda',1)
end = time.time()
print('time per prediction:',(end-start)/len(test_df))
In [13]:
!pip install nbconvert
Requirement already satisfied: nbconvert in /usr/local/lib/python3.6/dist-packages (5.6.1)
Requirement already satisfied: nbformat>=4.4 in /usr/local/lib/python3.6/dist-packages (from nbconvert) (5.0.7)
Requirement already satisfied: entrypoints>=0.2.2 in /usr/local/lib/python3.6/dist-packages (from nbconvert) (0.3)
Requirement already satisfied: pandocfilters>=1.4.1 in /usr/local/lib/python3.6/dist-packages (from nbconvert) (1.4.2)
Requirement already satisfied: bleach in /usr/local/lib/python3.6/dist-packages (from nbconvert) (3.2.1)
Requirement already satisfied: testpath in /usr/local/lib/python3.6/dist-packages (from nbconvert) (0.4.4)
Requirement already satisfied: traitlets>=4.2 in /usr/local/lib/python3.6/dist-packages (from nbconvert) (4.3.3)
Requirement already satisfied: mistune<2,>=0.8.1 in /usr/local/lib/python3.6/dist-packages (from nbconvert) (0.8.4)
Requirement already satisfied: defusedxml in /usr/local/lib/python3.6/dist-packages (from nbconvert) (0.6.0)
Requirement already satisfied: jinja2>=2.4 in /usr/local/lib/python3.6/dist-packages (from nbconvert) (2.11.2)
Requirement already satisfied: jupyter-core in /usr/local/lib/python3.6/dist-packages (from nbconvert) (4.6.3)
Requirement already satisfied: pygments in /usr/local/lib/python3.6/dist-packages (from nbconvert) (2.6.1)
Requirement already satisfied: ipython-genutils in /usr/local/lib/python3.6/dist-packages (from nbformat>=4.4->nbconvert) (0.2.0)
Requirement already satisfied: jsonschema!=2.5.0,>=2.4 in /usr/local/lib/python3.6/dist-packages (from nbformat>=4.4->nbconvert) (2.6.0)
Requirement already satisfied: packaging in /usr/local/lib/python3.6/dist-packages (from bleach->nbconvert) (20.4)
Requirement already satisfied: webencodings in /usr/local/lib/python3.6/dist-packages (from bleach->nbconvert) (0.5.1)
Requirement already satisfied: six>=1.9.0 in /usr/local/lib/python3.6/dist-packages (from bleach->nbconvert) (1.15.0)
Requirement already satisfied: decorator in /usr/local/lib/python3.6/dist-packages (from traitlets>=4.2->nbconvert) (4.4.2)
Requirement already satisfied: MarkupSafe>=0.23 in /usr/local/lib/python3.6/dist-packages (from jinja2>=2.4->nbconvert) (1.1.1)
Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.6/dist-packages (from packaging->bleach->nbconvert) (2.4.7)
In [18]:
%cd /content/drive/My Drive/Colab Notebooks/
/content/drive/My Drive/Colab Notebooks
In [ ]:
!jupyter nbconvert --to html   Foodvisor_challenge.ipynb
[NbConvertApp] Converting notebook Foodvisor_challenge.ipynb to html
In [ ]: